*! version 5.0
* 13 August 2018
* NIDS
* Master Expenditure do file for Nids Wave 2

* THIS IS 4th EXPENDITURE DO FILE - PERFORMING IMPUTATIONS FOR MISSING DATA: 4 OF 6
* THIS DO FILE PERFORMS IMPUTATIONS FOR MISSING DATA ON THE RELEVANT EXPENDITURE VARIABLES

*=====================================================================================================================================
* GLOBALS FOR DATA FILES, DO FILES AND VERSION SUFFIXES

* DEFINED IN "Master Expenditure do file (1 of 6)"

version 12.0

*=====================================================================================================================================

* OPENING DATASET PREPARED IN PRIOR DO FILE "Expenditure - Preparing variables for imputation (3 of 6).DO"

set more off

use "$DataOUT\prep.dta", clear 

*-------------------------------------------------------------------------------------------------------------------------------------

*FOOD IMPUTATIONS

*The "regimp" routine
cap program drop regimp
program define regimp
	gettoken depvar 0: 0
	local regressors "`0'"
	tempvar lndepvar
	gen `lndepvar'=ln(`depvar')
	xi:regress `lndepvar' `regressors'
	tempvar lndepvar_fit
	predict `lndepvar_fit'
	gen `depvar'_fit=exp(`lndepvar_fit')
	gen `depvar'_imp=`depvar'
	replace `depvar'_imp=`depvar'_fit if `depvar'==. & `depvar'_rec==1 & `depvar'_fit!=.
	gen `depvar'_data=0 if `depvar'_rec==1 & `depvar'!=.
	replace `depvar'_data=1 if `depvar'_rec==1 & `depvar'==. & `depvar'_fit!=.
	replace `depvar'_data=1 if `depvar'_rec==1 & `depvar'==. & `depvar'_imp!=.
	replace `depvar'_data=2 if `depvar'_rec==1 & `depvar'_imp==.
	label define `depvar'_data 0 "Survey" 1 "Imputed" 2 "Not imputed"
	label values `depvar'_data `depvar'_data
end

mvdecode _all, mv(-3=.)

forvalues q=1/32 {
foreach var of varlist fd`q' {
	quietly count if `var'_d==1
		if r(N)>=100 {
			regimp `var' i.hometype i.homewalls i.homeroof homerooms i.hhrace hhrace_d ///
			hhedu hhedusq i.hhintmonth hhage hhage_d
		quietly sum `var'_data
		if r(mean)>0.5 {
			replace `var'_imp=. if `var'_data==1
			lab def `var'_data 0 "Survey" 1 "Not imputed", modify
				}
		else {
				}
		}
				else {
					gen `var'_imp=`var'
					gen `var'_data=0 if `var'_rec==1
					replace `var'_data=1 if `var'_rec==1 & `var'_imp==.
					lab def `var'_data 0 "Survey" 1 "Not imputed", modify
					lab val `var'_data `var'_data
						}
			}
		}
		
egen expf=rowtotal(fd*_imp)
replace expf=. if hhresponse!=1
replace expf=. if phase2==1

/*Replacing missing food expenditure with one-shot food expenditure where appropriate.
This is done for households that did not provide any data for the food expenditure
line items.*/

foreach var of varlist totfood {
	quietly count if `var'_d==1
		if r(N)>=100 {
			regimp `var' i.hometype i.homewalls i.homeroof homerooms i.hhrace hhrace_d ///
			hhedu hhedusq i.hhintmonth hhage hhage_d
		quietly sum `var'_data
		if r(mean)>0.5 {
			replace `var'_imp=. if `var'_data==1
			lab def `var'_data 0 "Survey" 1 "Not imputed", modify
				}
		else {
				}
		}
				else {
					gen `var'_imp=`var'
					gen `var'_data=0 if `var'_rec==1
					replace `var'_data=1 if `var'_rec==1 & `var'_imp==.
					lab def `var'_data 0 "Survey" 1 "Not imputed", modify
					lab val `var'_data `var'_data
						}
			}
/*If expf==0 and the household was successfuly interviewed, it means that all line items
were recorded as missing. For these households we use the one-shot expenditure instead.
This applies to all phase 1 households.*/

gen expf_flg=.
replace expf_flg=0 if expf!=0 & hhresponse==1
replace expf_flg=1 if expf==0 & hhresponse==1	
lab def expf_flg 0 "Survey" 1 "Imputed", add
lab val expf_flg expf_flg

replace expf=totfood_imp if expf==0 & hhresponse==1
replace expf=. if hhresponse!=1
replace expf=. if phase2==1
replace expf_flg=2 if phase2==1
lab def expf_flg 0 "Survey" 1 "Imputed" 2 "No data", modify
replace expf_flg=. if hhresponse!=1
lab val expf_flg expf_flg

********************************************************************

*NON-FOOD IMPUTATIONS

forvalues q=1/53 {
foreach var of varlist nf`q' {
	quietly count if `var'_d==1
		if r(N)>=100 {
			regimp `var' i.hometype i.homewalls i.homeroof homerooms i.hhrace hhrace_d ///
			hhedu hhedusq i.hhintmonth hhage hhage_d
		quietly sum `var'_data
		if r(mean)>0.5 {
			replace `var'_imp=. if `var'_data==1
			lab def `var'_data 0 "Survey" 1 "Not imputed", modify
				}
		else {
				}
		}
				else {
					gen `var'_imp=`var'
					gen `var'_data=0 if `var'_rec==1
					replace `var'_data=1 if `var'_rec==1 & `var'_imp==.
					lab def `var'_data 0 "Survey" 1 "Not imputed", modify
					lab val `var'_data `var'_data
						}
			}
		}

egen expnf=rowtotal(nf*_imp)

/*NOW SETTING TO MISSING, ALL HOUSEHOLDS THAT ARE NON-RESPONSE, ARE IN PHASE 2, HAVE setmis==1, AND 
HAVE EVERY SINGLE COMPONENT OF NON-FOOD EXPENDITURE EQUAL TO ZERO.*/
replace expnf=. if expnf==0

/*WE DO NOT HAVE A ONE-SHOT NON-FOOD TO FALL BACK ON, SO WE USE THE DATA THAT WE HAVE FOR AGGREGATE
NON-FOOD EXPENDITURE IN ORDER TO IMPUTE A ONE-SHOT FOR THOSE HOUSEHOLDS THAT DON'T.*/

gen totnonfood_rec=hhresponse==1
gen totnonfood=expnf
gen totnonfood_d=totnonfood!=.

foreach var of varlist totnonfood {
	quietly count if `var'_d==1
		if r(N)>=100 {
			regimp `var' i.hometype i.homewalls i.homeroof homerooms i.hhrace hhrace_d ///
			hhedu hhedusq i.hhintmonth hhage hhage_d
		quietly sum `var'_data
		if r(mean)>0.5 {
			replace `var'_imp=. if `var'_data==1
			lab def `var'_data 0 "Survey" 1 "Not imputed", modify
				}
		else {
				}
		}
				else {
					gen `var'_imp=`var'
					gen `var'_data=0 if `var'_rec==1
					replace `var'_data=1 if `var'_rec==1 & `var'_imp==.
					lab def `var'_data 0 "Survey" 1 "Not imputed", modify
					lab val `var'_data `var'_data
						}
			}
							
gen expnf_flg=.
replace expnf_flg=0 if expnf!=. & hhresponse==1
replace expnf_flg=1 if expnf==. & hhresponse==1	
lab def expnf_flg 0 "Survey" 1 "Imputed", add
lab val expnf_flg expf_flg

*Now include the one-shot for all except phase 2 households
replace expnf=totnonfood_imp if expnf==. & hhresponse==1 & phase2!=1

*Flagging the phase 2 households as not having non-food data
replace expnf_flg=2 if phase2==1
lab def expnf_flg 0 "Survey" 1 "Imputed" 2 "No data", modify
replace expnf_flg=. if hhresponse!=1
lab val expnf_flg expnf_flg

*IMPUTING FOR RENTAL EXPENDITURE AND TOTAL ONE-SHOT EXPENDITURE

foreach var of varlist rentpay totexp {
	quietly count if `var'_d==1
		if r(N)>=100 {
			regimp `var' i.hometype i.homewalls i.homeroof homerooms i.hhrace hhrace_d ///
			hhedu hhedusq i.hhintmonth hhage hhage_d
		quietly sum `var'_data
		if r(mean)>0.5 {
			replace `var'_imp=. if `var'_data==1
			lab def `var'_data 0 "Survey" 1 "Not imputed", modify
				}
		else {
				}
		}
				else {
					gen `var'_imp=`var'
					gen `var'_data=0 if `var'_rec==1
					replace `var'_data=1 if `var'_rec==1 & `var'_imp==.
					lab def `var'_data 0 "Survey" 1 "Not imputed", modify
					lab val `var'_data `var'_data
						}
			}
			
gen rentexpend=rentpay_imp
gen rentexpend_flg=rentpay_data

gen oneshotexp=totexp_imp
gen oneshotexp_flg=totexp_data

save "$DataOUT\imputed.dta", replace

* end of do fle 
*========================================================================================================================================

